Updated 2021-08-19 13:59:05
Models:
Log-Linear (power law): \(C=\alpha Q^{\beta}\) or the linearized version of \(log(C) = \beta log(\alpha) + log(Q)\).
Segmented: (with one break point)
Breakpoints are determined through an iterative process and identified when the linear relationship changes. The model is estimated simultaneously yielding point estimates and relevant approximate standard errors of all the model parameters, including the break-points.
# To check and install if needed packages
check.packages <- function(pkg){
new.pkg <- pkg[!(pkg %in% installed.packages()[, "Package"])]
if (length(new.pkg))
install.packages(new.pkg, dependencies = TRUE)
sapply(pkg, require, character.only = TRUE)
}
pkg=c("segmented","gvlma")
check.packages(pkg)
library(segmented)
library(gvlma)
#' Fitting models to Concentration-Discharge data using several different
#' models/methods and extracting relevat information
#' @param logQ log transformed paired discharge value
#' @param logC log transformed paired concentration value
#' @param Q50 median value of daily discharge
#' @param plot default = TRUE; identifies if you want to plot results.
#' @param models default = c("log-linear","segmented","moatar"); identified
#' what models to plot
#' @param plot.CI default = TRUE; If the models are plotted a 95% confidence
#' interval will also be plotted
#' @param CI.level default = 0.95; confidence interval level
#' @param plot.lwd default = 1.5; model line thickness
#' @param legend.pos default = "topleft"; legend position if models are plotted
#' @param print.rslt defualt = TRUE; print output table.
#' @param ... other plotting variables see ?plot (i.e. pch, lwd, bg, col, ylim, etc.)
CQ_fun=function(logQ,logC,Q50,plot=TRUE,
models=c("log-linear","segmented","moatar"),
plot.CI=TRUE,
CI.level=0.95,
plot.lwd=1.5,
legend.pos="topleft",
print.rslt=TRUE,
xlab="Discharge",
ylab="Concentration",...){
# Power Law
loglog.mod=lm(logC~logQ)
loglog.mod.sum=summary(loglog.mod)
# Check assumptions
LL.assump=gvlma(loglog.mod)
asumpt.rslt=as.character(ifelse(LL.assump$GlobalTest$GlobalStat4$pvalue<0.05,"No","Yes"))
# variabled of interest
loglog.beta=as.numeric(coef(loglog.mod)[2])
loglog.alpha=as.numeric(coef(loglog.mod)[1])
LL.rslt=data.frame(LL.R2=loglog.mod.sum$r.squared,
LL.R2.adj=loglog.mod.sum$adj.r.squared,
LL.RMSE=loglog.mod.sum$sigma,
LL.beta=loglog.beta,
LL.beta.LCI=as.numeric(confint(loglog.mod)[2,1]),
LL.beta.UCI=as.numeric(confint(loglog.mod)[2,2]),
LL.beta.SE=loglog.mod.sum$coefficients[2,2],
LL.beta.tval=loglog.mod.sum$coefficients[2,3],
LL.beta.pval=loglog.mod.sum$coefficients[2,4],
LL.alpha=loglog.alpha,
LL.AIC=AIC(loglog.mod),
LL.BIC=BIC(loglog.mod),
LL.LogLik=as.numeric(logLik(loglog.mod)),
LL.assumpt.pass=asumpt.rslt)
# Segmented
# Picks 1st breakpoint
loglog.mod.seg=segmented(loglog.mod,seg.Z=~logQ,npsi=1)
loglog.mod.seg.sum=summary(loglog.mod.seg)
# is there a breakpoint?
if(is.null(loglog.mod.seg$psi)){
seg.rslt=data.frame(seg.bk.est=NA,
seg.bk.SE=NA,
seg.R2=NA,
seg.R2.adj=NA,
seg.RMSE=NA,
seg.AIC=NA,
seg.BIC=NA,
seg.LogLik=NA)
}else{
seg.psi.est=loglog.mod.seg$psi[1,c(2)]
seg.psi.SE=seg.psi=loglog.mod.seg$psi[1,c(3)]
seg.rslt=data.frame(seg.bk.est=exp(seg.psi.est),
seg.bk.SE=exp(seg.psi.SE),
seg.R2=loglog.mod.seg.sum$r.squared,
seg.R2.adj=loglog.mod.seg.sum$adj.r.squared,
seg.RMSE=loglog.mod.seg.sum$sigma,
seg.AIC=AIC(loglog.mod.seg),
seg.BIC=BIC(loglog.mod.seg),
seg.LogLik=as.numeric(logLik(loglog.mod.seg)))
}
# Moatar
logQ.m=logQ[logQ<log(Q50)]
logC.m=logC[logQ<log(Q50)]
loglog.mod.inf=lm(logC.m~logQ.m)
loglog.mod.inf.sum=summary(loglog.mod.inf)
CQ50.inf=predict(loglog.mod.inf,data.frame(logQ.m=log(Q50)))
logQ.m=logQ[logQ>log(Q50)]
logC.m=logC[logQ>log(Q50)]
loglog.mod.sup=lm(logC.m~logQ.m)
loglog.mod.sup.sum=summary(loglog.mod.sup)
CQ50.sup=predict(loglog.mod.sup,data.frame(logQ.m=log(Q50)))
moatar.rslt=data.frame(Q50=Q50,
moatar.beta.inf=as.numeric(coef(loglog.mod.inf)[2]),
moatar.beta.inf.tval=loglog.mod.inf.sum$coefficients[2,3],
moatar.beta.inf.pval=loglog.mod.inf.sum$coefficients[2,4],
moatar.R2.inf=loglog.mod.inf.sum$r.squared,
moatar.R2.adj.inf=loglog.mod.inf.sum$adj.r.squared,
moatar.RMSE.inf=loglog.mod.inf.sum$sigma,
moatar.beta.sup=as.numeric(coef(loglog.mod.sup)[2]),
moatar.beta.sup.tval=loglog.mod.sup.sum$coefficients[2,3],
moatar.beta.sup.pval=loglog.mod.sup.sum$coefficients[2,4],
moatar.R2.sup=loglog.mod.sup.sum$r.squared,
moatar.R2.adj.sup=loglog.mod.sup.sum$adj.r.squared,
moatar.RMSE.sup=loglog.mod.sup.sum$sigma,
moatar.C_Q50=exp(mean(c(CQ50.inf,CQ50.sup))))
# ratio of CV Conc and CV Discharge
Cvc=(sd(exp(logC),na.rm=T)/mean(exp(logC),na.rm=T))*100
Cvq=(sd(exp(logQ),na.rm=T)/mean(exp(logQ),na.rm=T))*100
CVcq=data.frame(CVc_CVq=Cvc/Cvq)
rslt.all=cbind(LL.rslt,seg.rslt,moatar.rslt,CVcq)
# Plotting
c.val=exp(logC)
q.val=exp(logQ)
if(plot==T){
plot(c.val~q.val,log="xy",...)
if(sum(match(models,"log-linear"),na.rm=T)==1){
x.val=seq(min(logQ,na.rm=T),max(logQ,na.rm=T),length.out=70)
LL.pred=predict(loglog.mod,data.frame(logQ=x.val),interval="confidence",
level=CI.level)
lines(exp(x.val),exp(LL.pred[,1]),col="red",lwd=plot.lwd)
if(plot.CI==T){
lines(exp(x.val),exp(LL.pred[,2]),col="red",lty=2)
lines(exp(x.val),exp(LL.pred[,3]),col="red",lty=2)
}
}
if(is.null(loglog.mod.seg$psi)==F){
if(sum(match(models,"segmented"),na.rm=T)==1){
x.val=seq(min(logQ,na.rm=T),max(logQ,na.rm=T),length.out=70)
seg.pred=predict(loglog.mod.seg,data.frame(logQ=x.val),
interval="confidence",level=CI.level)
lines(exp(x.val),exp(seg.pred[,1]),col="blue",lwd=plot.lwd)
if(plot.CI==T){
lines(exp(x.val),exp(seg.pred[,2]),col="blue",lty=2)
lines(exp(x.val),exp(seg.pred[,3]),col="blue",lty=2)
}
}
}
if(sum(match(models,"moatar"),na.rm=T)==1){
abline(v=Q50)
x.val=seq(min(logQ,na.rm=T),log(Q50),length.out=50)
LL.inf=predict(loglog.mod.inf,data.frame(logQ.m=x.val),
interval="confidence",level=CI.level)
lines(exp(x.val),exp(LL.inf[,1]),col="forestgreen",lwd=plot.lwd)
if(plot.CI==T){
lines(exp(x.val),exp(LL.inf[,2]),col="forestgreen",lty=2)
lines(exp(x.val),exp(LL.inf[,3]),col="forestgreen",lty=2)
}
x.val=seq(log(Q50),max(logQ,na.rm=T),length.out=50)
LL.sup=predict(loglog.mod.sup,data.frame(logQ.m=x.val),
interval="confidence",level=CI.level)
lines(exp(x.val),exp(LL.sup[,1]),col="forestgreen")
if(plot.CI==T){
lines(exp(x.val),exp(LL.sup[,2]),col="forestgreen",lty=2)
lines(exp(x.val),exp(LL.sup[,3]),col="forestgreen",lty=2)
}
}
mod.col=c("red","blue","forestgreen")
mod.col=mod.col[match(models,c("log-linear","segmented","moatar"))]
legend.pos=if(is.na(legend.pos)==T){"topleft"}else(legend.pos)
legend(legend.pos,
legend=models,
lty=1,
col=mod.col,
ncol=1,bty="n",y.intersp=1,x.intersp=0.75,
xpd=NA,xjust=0.5,yjust=0.5,title="Model",title.adj = 0)
}
if(print.rslt==TRUE){return(rslt.all)}
}
Output includes measure of relative and absolute fit, slopes and intercepts and breakpoints as a data.frame.
LL.R2 : Log-linear R2LL.R2.adj : Log-linear adjusted R2LL.RMSE : Log-linear Root Mean Square ErrorLL.beta : Log-linear slopeLL.beta.SE: Log-linear slope standard errorLL.beta.tval : Log-linear slope t-value (different from zero)LL.beta.pval : Log-linear slope p-value (different from zero)LL.alpha : Log-linear interceptLL.AIC: Log-Linear AICLL.BIC: Log-Linear BICLL.LogLik: Log-Linear log-likelihood (used to calculate AIC and BIC)LL.assumpt.pass: uses gvlma package to test linear model assumptions, extracts global test decision.seg.bk.est : Segmented regression break point (if one is detected). Only reports first break point if more than one is detected (back-transformed)seg.bk.SE : Segmented regression break point standard error (back-transformed)seg.R2 : Segmented R2seg.R2.adj : Segmented adjusted R2seg.RMSE : Segmented Root Mean Square Errorseg.AIC: Segmented AICseg.BIC: Segmented BICseg.LogLik: Segmented log-likelihood (used to calculate AIC and BIC)Q50 : Median discharge value used in function inputmoatar.beta.inf : Slope of C-Q below the median discharge valuemoatar.R2.inf : R2 of C-Q below the median discharge valuemoatar.R2.adj.inf : Adjusted R2 of C-Q below the median discharge valuemoatar.RMSE.inf : Root Mean Square Error of C-Q below the median discharge valuemoatar.beta.sup : Slope of C-Q above the median discharge valuemoatar.R2.sup : R2 of C-Q above the median discharge valuemoatar.R2.adj.sup : Adjusted R2 of C-Q above the median discharge valuemoatar.RMSE.sup : Root Mean Square Error of C-Q above the median discharge valuemoatar.C_Q50 : Averaged predicted concentration at median discharge# read discharge data
dat.Q=read.csv("./Data/M786.2C_Q_WRTDS.csv")
dat.Q$Date=as.Date(dat.Q$Date)# identify date
# read concentration data
dat.C=read.csv("./Data/M786.2C_Si_WRTDS.csv")
dat.C$Date=as.Date(dat.C$Date)# identify date
# merge the C and Q data
dat.CQ=merge(dat.Q,dat.C,"Date")
Q50=median(dat.Q$Q,na.rm=T)
CQ_fun(log(dat.CQ$Q),log(dat.CQ$Si),Q50,plot.CI=T,pch=19,ylim=c(1,10),
legend.pos = "bottomright",
ylab="Si Concentration (mg/L)",
xlab="Discharge (cfs)")
C-Q relationship of M786.2 (UMR; all data) and all models
## LL.R2 LL.R2.adj LL.RMSE LL.beta LL.beta.LCI LL.beta.UCI
## 1 0.0009164036 -0.001813333 0.3739276 0.01538679 -0.03683499 0.06760857
## LL.beta.SE LL.beta.tval LL.beta.pval LL.alpha LL.AIC LL.BIC LL.LogLik
## 1 0.02655615 0.579406 0.5626717 1.560551 324.3351 336.0594 -159.1676
## LL.assumpt.pass seg.bk.est seg.bk.SE seg.R2 seg.R2.adj seg.RMSE seg.AIC
## 1 No 14900.55 1.334313 0.01539102 0.00727611 0.3722274 322.9646
## seg.BIC seg.LogLik Q50 moatar.beta.inf moatar.beta.inf.tval
## 1 342.505 -156.4823 18000 -0.2480301 -2.298768
## moatar.beta.inf.pval moatar.R2.inf moatar.R2.adj.inf moatar.RMSE.inf
## 1 0.0229372 0.03492981 0.02831974 0.4274229
## moatar.beta.sup moatar.beta.sup.tval moatar.beta.sup.pval moatar.R2.sup
## 1 0.02826071 0.6106936 0.5420415 0.0017157
## moatar.R2.adj.sup moatar.RMSE.sup moatar.C_Q50 CVc_CVq
## 1 -0.002884689 0.3277656 5.130173 0.3462364
Here is an example of plotting each individual model without individual data points, and output suppressed.
par(mar=c(2,4,1,1),oma=c(3,1,1,1),mgp=c(2,1,0))
layout(matrix(1:3,3,1))
CQ_fun(log(dat.CQ$Q),log(dat.CQ$Si),Q50,plot.CI=T,ylim=c(1,10),
legend.pos = "bottomright",
ylab="Si Concentration (mg/L)",
xlab="Discharge (cfs)",
type="n",
models="log-linear",
print.rslt=F)
CQ_fun(log(dat.CQ$Q),log(dat.CQ$Si),Q50,plot.CI=T,ylim=c(1,10),
legend.pos = "bottomright",
ylab="Si Concentration (mg/L)",
xlab="Discharge (cfs)",
type="n",
models="segmented",
print.rslt=F)
CQ_fun(log(dat.CQ$Q),log(dat.CQ$Si),Q50,plot.CI=T,ylim=c(1,10),
legend.pos = "bottomright",
ylab="Si Concentration (mg/L)",
xlab="Discharge (cfs)",
type="n",
models="moatar",
print.rslt=F,
xpd=NA)
C-Q relationship of M786.2 (UMR; all data) and all models with individual data points removed from the plot
# read discharge data
dat.Q=read.csv("./Data/MPR_Q_WRTDS.csv")
dat.Q$Date=as.Date(dat.Q$Date)# identify date
# read concentration data
dat.C=read.csv("./Data/MPR_Si_WRTDS.csv")
dat.C$Date=as.Date(dat.C$Date)# identify date
# merge the C and Q data
dat.CQ=merge(dat.Q,dat.C,"Date")
Q50=median(dat.Q$Q,na.rm=T)
CQ_fun(log(dat.CQ$Q),log(dat.CQ$Si),Q50,plot.CI=T,pch=19,ylim=c(1,50),
legend.pos = "bottomright",
ylab="Si Concentration (mg/L)",
xlab="Discharge (cfs)",
main="LUQ MPR")
C-Q relationship of MPR (LUQ; all data) and all models
## LL.R2 LL.R2.adj LL.RMSE LL.beta LL.beta.LCI LL.beta.UCI LL.beta.SE
## 1 0.2607266 0.2601216 0.3397364 -0.2674487 -0.2927239 -0.2421736 0.01288294
## LL.beta.tval LL.beta.pval LL.alpha LL.AIC LL.BIC LL.LogLik LL.assumpt.pass
## 1 -20.75992 3.084079e-82 3.928898 834.7354 850.065 -414.3677 No
## seg.bk.est seg.bk.SE seg.R2 seg.R2.adj seg.RMSE seg.AIC seg.BIC
## 1 56.92265 1.222207 0.2709456 0.2691529 0.3376566 821.6979 847.2473
## seg.LogLik Q50 moatar.beta.inf moatar.beta.inf.tval moatar.beta.inf.pval
## 1 -405.849 35 -0.2005908 -4.499335 8.188038e-06
## moatar.R2.inf moatar.R2.adj.inf moatar.RMSE.inf moatar.beta.sup
## 1 0.03263879 0.03102652 0.3482593 -0.3427692
## moatar.beta.sup.tval moatar.beta.sup.pval moatar.R2.sup moatar.R2.adj.sup
## 1 -15.74424 4.156418e-47 0.2892829 0.2881159
## moatar.RMSE.sup moatar.C_Q50 CVc_CVq
## 1 0.3179681 20.55023 0.2016571
par(mar=c(2,4,1,1),oma=c(3,1,1,1),mgp=c(2,1,0))
layout(matrix(1:3,3,1))
CQ_fun(log(dat.CQ$Q),log(dat.CQ$Si),Q50,plot.CI=T,ylim=c(1,50),
legend.pos = "bottomright",
ylab="Si Concentration (mg/L)",
xlab="Discharge (cfs)",
type="n",
models="log-linear",
print.rslt=F)
CQ_fun(log(dat.CQ$Q),log(dat.CQ$Si),Q50,plot.CI=T,ylim=c(1,50),
legend.pos = "bottomright",
ylab="Si Concentration (mg/L)",
xlab="Discharge (cfs)",
type="n",
models="segmented",
print.rslt=F)
CQ_fun(log(dat.CQ$Q),log(dat.CQ$Si),Q50,plot.CI=T,ylim=c(1,50),
legend.pos = "bottomright",
ylab="Si Concentration (mg/L)",
xlab="Discharge (cfs)",
type="n",
models="moatar",
print.rslt=F,
xpd=NA)
C-Q relationship of MPR (LUQ; all data) and all models with individual data points removed from the plot
dat=read.csv(paste0("./Data/Si_Q_WRTDS_sites.csv"))
C-Q relationship of all sites with available data.
site.vals=unique(dat$site.name)
CQ.rslt=data.frame()
for(i in 1:length(site.vals)){
tmp=subset(dat,site.name==site.vals[i])
tmp$logQ=log(tmp$Q+1)
tmp$logC=log(tmp$Si)
rslt=with(tmp,CQ_fun(logQ,logC,median(Q+1,na.rm=T),plot=F))
rslt$SITE=site.vals[i]
rslt$LTER=unique(tmp$LTER)
CQ.rslt=rbind(CQ.rslt,rslt)
}
head(CQ.rslt,2L)
## LL.R2 LL.R2.adj LL.RMSE LL.beta LL.beta.LCI LL.beta.UCI
## 1 0.0005475968 -0.000491335 0.5144224 -0.01141096 -0.04225563 0.01943371
## 2 0.1508349862 0.144681617 0.7198764 -0.20764344 -0.29057065 -0.12471624
## LL.beta.SE LL.beta.tval LL.beta.pval LL.alpha LL.AIC LL.BIC
## 1 0.01571757 -0.7260005 4.680151e-01 0.08194892 1458.1495 1472.7628
## 2 0.04193954 -4.9510184 2.120389e-06 -0.62016070 309.2592 318.0841
## LL.LogLik LL.assumpt.pass seg.bk.est seg.bk.SE seg.R2 seg.R2.adj
## 1 -726.0747 No 62217.95768 1.521093 0.002792477 -0.0003237965
## 2 -151.6296 Yes 1.17013 1.165232 0.235717577 0.2188584058
## seg.RMSE seg.AIC seg.BIC seg.LogLik Q50 moatar.beta.inf
## 1 0.5143793 1459.9818 1484.3372 -724.9909 12810.0 -0.00047434
## 2 0.6879532 298.5149 313.2231 -144.2574 10.9 -0.02867711
## moatar.beta.inf.tval moatar.beta.inf.pval moatar.R2.inf moatar.R2.adj.inf
## 1 -0.01338789 0.9893239 3.734076e-07 -0.002082959
## 2 -0.20704248 0.8366046 6.393908e-04 -0.014276439
## moatar.RMSE.inf moatar.beta.sup moatar.beta.sup.tval moatar.beta.sup.pval
## 1 0.4781620 0.03248396 0.7249219 0.46885319
## 2 0.8861889 -0.17356137 -2.3325204 0.02268602
## moatar.R2.sup moatar.R2.adj.sup moatar.RMSE.sup moatar.C_Q50 CVc_CVq
## 1 0.001093619 -0.0009874362 0.5485169 0.9618526 0.4177392
## 2 0.075104949 0.0613005449 0.4827239 0.3607766 0.6118868
## SITE LTER
## 1 ALBION NWT
## 2 Andersen Creek at H1 MCM
Download CQ.rslt as Excel file
Log-Linear Slope for each site.
The ratio CV concentration and CV discharge volume for each site.